# Summary: Creates Appendix F

#########################################################################
#-------------------Appendix F: Descriptive Stats ----------------------#
#########################################################################

options(scipen=999)
rm(list = ls())

# Load libraries
library(rddensity)
library(rdrobust)
library(rdpower)

# Set WD
# setwd('~/Dataverse/')

# Load Data.
load("./Data/tse_ibge.RData") 
load("./Data/bal_checks_census.RData") 

# Load Data
load("./Data/d.RData")

# Keep only observations from municipalities where both parties had male and female candidates
d1 <- subset(d, nonZeroDF == 2)

# Get codeyear for where a centralized and decentralized party won
centParties <- d1$codeyear[(d1$SIGLA_PARTIDO %in% c("PC do B", "PT", "PSDB", "PPS", "PFL", "DEM", "PL", "PR", "PDT")) & d1$eleito == 1]
decentParties <- d1$codeyear[!(d1$SIGLA_PARTIDO %in% c("PC do B", "PT", "PSDB", "PPS", "PFL", "DEM", "PL", "PR", "PDT")) & d1$eleito == 1]

# Add IBGE CODE
used <- merge(bal_checks_census, tse_ibge[, c('codigo_tse', 'codigo_ibge')], by.x = c('ID_ibge'), by.y = 'codigo_ibge')

# Code year to the closest census
used$year <- 2010
used$year[used$ANO_ELEICAO < 2006] <- 2000

# All Municipalities
d1$census_year <- 2000 
d1$census_year[d1$ANO_ELEICAO > 2005] <- 2010

# Only municipalities with mixed-gender elections
# We only want mixed-gender races
d$female <- as.numeric(d$CODIGO_SEXO == 4)
mixed <- aggregate(d$female, list(d$codeyear), sum)
names(mixed) <- c('codeyear', 'mixed')
mixed$mixed <- ifelse(mixed$mixed == 1, 1, 0)
d <- merge(d, mixed, by = 'codeyear')
# Subset to Mixed Races and parties ran both male and female candidates
d2 <- subset(d, mixed == 1 & nonZeroDF == 2)
# Create running variable
d2$run_var_fem <- ifelse(d2$position == 1 & d2$CODIGO_SEXO == 4
	, d2$diff_cand * 1, d2$diff_cand * -1)
d2$run_var_fem[d2$mayorFem == 0] <- NA
d2$run_var_male <- ifelse(d2$position == 1 & d2$CODIGO_SEXO == 2
	, d2$diff_cand * 1, d2$diff_cand * -1)
d2$run_var_male[d2$mayorFem == 1] <- NA
table(is.na(d2$run_var_male))
table(is.na(d2$run_var_fem))

# All Municipalities
d2$census_year <- 2000 
d2$census_year[d2$ANO_ELEICAO > 2005] <- 2010

# Keep only the municipalities that we will use
d1$code_census <- paste0(d1$CODIGO_MUNICIPIO, '-', d1$census_year)
d2$code_census <- paste0(d2$CODIGO_MUNICIPIO, '-', d2$census_year)
used$code_census <- paste0(used$codigo_tse, '-', used$yr_census)
# Statistics for all municipalities
used$total_pop <- used$total_pop/1000

# Table F.25
sink("./AppendixFResults/tableF25.txt")
print("TABLE F.25: Descriptive Statistics for All Municipalities Included in the Main Sample")
summary(used[used$code_census %in% unique(d1$code_census), c('total_pop',
	'perc_literate',
	'perc_female',
	'perc_urban',
	'perc_white',
	'perc_rede_geral')]	)
print("N and Standard Deviation")
length(used[used$code_census %in% unique(d1$code_census), "total_pop"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "total_pop"])); sd(used[used$code_census %in% unique(d1$code_census), "total_pop"], na.rm = T)
length(used[used$code_census %in% unique(d1$code_census), "perc_literate"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "perc_literate"])); sd(used[used$code_census %in% unique(d1$code_census), "perc_literate"], na.rm = T)
length(used[used$code_census %in% unique(d1$code_census), "perc_female"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "perc_female"])); sd(used[used$code_census %in% unique(d1$code_census), "perc_female"], na.rm = T)
length(used[used$code_census %in% unique(d1$code_census), "perc_urban"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "perc_urban"])); sd(used[used$code_census %in% unique(d1$code_census), "perc_urban"], na.rm = T)
length(used[used$code_census %in% unique(d1$code_census), "perc_white"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "perc_white"])); sd(used[used$code_census %in% unique(d1$code_census), "perc_white"], na.rm = T)
length(used[used$code_census %in% unique(d1$code_census), "perc_rede_geral"]) - sum(is.na(used[used$code_census %in% unique(d1$code_census), "perc_rede_geral"])); sd(used[used$code_census %in% unique(d1$code_census), "perc_rede_geral"], na.rm = T)
sink()

# Table F.26
sink("./AppendixFResults/tableF26.txt")
print("TABLE F.26: Descriptive Statistics for All Municipalities Included in the Gender-Mixed Races Sample")
summary(used[used$code_census %in% unique(d2$code_census), c('total_pop',
	'perc_literate',
	'perc_female',
	'perc_urban',
	'perc_white',
	'perc_rede_geral')])
print("N and Standard Deviation")
length(used[used$code_census %in% unique(d2$code_census), "total_pop"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "total_pop"])); sd(used[used$code_census %in% unique(d2$code_census), "total_pop"], na.rm = T)
length(used[used$code_census %in% unique(d2$code_census), "perc_literate"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "perc_literate"])); sd(used[used$code_census %in% unique(d2$code_census), "perc_literate"], na.rm = T)
length(used[used$code_census %in% unique(d2$code_census), "perc_female"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "perc_female"])); sd(used[used$code_census %in% unique(d2$code_census), "perc_female"], na.rm = T)
length(used[used$code_census %in% unique(d2$code_census), "perc_urban"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "perc_urban"])); sd(used[used$code_census %in% unique(d2$code_census), "perc_urban"], na.rm = T)
length(used[used$code_census %in% unique(d2$code_census), "perc_white"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "perc_white"])); sd(used[used$code_census %in% unique(d2$code_census), "perc_white"], na.rm = T)
length(used[used$code_census %in% unique(d2$code_census), "perc_rede_geral"]) - sum(is.na(used[used$code_census %in% unique(d2$code_census), "perc_rede_geral"])); sd(used[used$code_census %in% unique(d2$code_census), "perc_rede_geral"], na.rm = T)
sink()
